Identify and remove outliers
summarize = dplyr::summarize
#check basic performance -- remove non-responding subjects
temp = totalData %>% group_by(subID,feature_index) %>% summarize(num_noresponses= sum(keys==-1), trialNum = length(subID), no_resp = sum(keys==-1)/trialNum)
sprintf("total trial number: %i; Number of subjects %i",temp$trialNum[1],dim(temp)[1])
## [1] "total trial number: 231; Number of subjects 81"
head(temp)
## # A tibble: 6 x 5
## # Groups: subID [6]
## subID feature_index num_noresponses trialNum no_resp
## <fct> <int> <int> <int> <dbl>
## 1 "" 2 11 231 0.0476
## 2 10438bd 3 0 231 0
## 3 1050095 2 24 231 0.104
## 4 1070d1e 1 0 231 0
## 5 10f4080 3 0 231 0
## 6 1106b05 2 4 231 0.0173
###quantile: sample quantiles corresponding to the given probabilities
summary(temp$no_resp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.000000 0.008658 0.045722 0.025974 1.000000
noresp_thre = quantile(temp$no_resp,prob = trial_thre)
sprintf("the last 5percent of non-resp rate (threshold for excluding subjects): %f",noresp_thre)
## [1] "the last 5percent of non-resp rate (threshold for excluding subjects): 0.129870"
ggplot(temp, aes(y= no_resp, x= subID,color = as.factor(feature_index)))+
geom_point()+
geom_hline(yintercept=noresp_thre, linetype="dashed", color = "red", size=0.5)+
labs(title="No responses", x ="subjuect ID", y = "number of no responses")+
plotformat

ggplot(temp, aes(y= no_resp, x= feature_index,color = as.factor(feature_index)))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
geom_hline(yintercept=noresp_thre, linetype="dashed", color = "red", size=0.5)+
labs(title="No responses", x ="subjuect ID", y = "number of no responses")+
plotformat
## Warning: Removed 1 rows containing missing values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

#no response subjects
removeSub = temp$subID[temp$no_resp>=noresp_thre]
totalData = totalData%>%filter(!subID %in% removeSub)
totalData = mutate(totalData, subID = as.factor(subID))
demographic_information = demographic_information%>%filter(!SubID %in%removeSub)
sprintf("remove no response subjects: %i; remain subjects %i",length(removeSub),length(unique(totalData$subID)))
## [1] "remove no response subjects: 5; remain subjects 76"
removeSub
## [1] 13599c4 13599c4 1612848 1c24bad 1d9ad2f
## 80 Levels: 10438bd 1050095 1070d1e 10f4080 1106b05 1119c1a ... c223b3d
Behavior analysis
summarize = dplyr::summarize
#over all performance
basic = totalData %>% filter(keys>=0, run==3)%>% group_by(subID)%>% summarize(
hit = sum(keys==1&SameDiff=="S"),
fa= sum(keys==1&SameDiff=="D"),
cr = sum(keys==0&SameDiff=="D"),
miss = sum(keys==0&SameDiff=="S"),
HitRate =sum(keys==1&SameDiff=="S")/sum(SameDiff=="S"),
FARate = sum(keys==1&SameDiff=="D")/sum(SameDiff=="D"),
CRRate = sum(keys==0&SameDiff=="D")/sum(SameDiff=="D"),
MissRate = sum(keys==0&SameDiff=="S")/sum(SameDiff=="S"),
adjFARate = 1/(2*sum(SameDiff=="D")),
adjHitRate = 1-(1/(2*sum(SameDiff=="S"))))
basic$FARate[basic$FARate==0]=basic$adjFARate[basic$FARate==0]
basic$HitRate[basic$HitRate[!is.na(basic$HitRate)]==1]=basic$adjHitRate[basic$HitRate[!is.na(basic$HitRate)]==1]
row_index = basic$CRRate==1 & basic$MissRate==1
basic$FARate[row_index]=NA
basic$HitRate[row_index]=NA
basic$adjFARate[row_index]=NA
basic$adjHitRate[row_index]=NA
row_index = basic$CRRate==0 & basic$MissRate==0
basic$FARate[row_index]=NA
basic$HitRate[row_index]=NA
basic$adjFARate[row_index]=NA
basic$adjHitRate[row_index]=NA
basic = mutate(basic, d_p = qnorm(HitRate)-qnorm(FARate))
#Plotting
basic_longform = gather(basic, response_type, number_resp, HitRate,FARate,CRRate,MissRate)
head(basic_longform)
## # A tibble: 6 x 10
## subID hit fa cr miss adjFARate adjHitRate d_p response_type
## <fct> <int> <int> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 "" 28 9 24 9 0.0152 0.986 1.30 HitRate
## 2 1043… 42 0 35 0 0.0143 0.988 4.45 HitRate
## 3 1050… 31 12 17 7 0.0172 0.987 1.12 HitRate
## 4 1070… 41 2 33 1 0.0143 0.988 3.56 HitRate
## 5 10f4… 41 1 34 1 0.0143 0.988 3.88 HitRate
## 6 1106… 41 1 33 1 0.0147 0.988 3.87 HitRate
## # ... with 1 more variable: number_resp <dbl>
ggplot(basic_longform, aes(x = response_type, y = number_resp, color = response_type))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
labs(title="Proportaion response type", x = "response type", y = "proportion")+
plotformat
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

#d-p
ggplot(basic, aes(x = subID, y = d_p, color = as.factor(subID)))+
geom_point(size=3)+
labs(title="d prime", x = "subject", y = "d prime")+
plotformat
## Warning: Removed 1 rows containing missing values (geom_point).

ggplot(basic,aes(d_p))+
geom_histogram()+
labs(title="d prime histogram", x = "d prime", y = "counts")+
plotformat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

#bad d_p
check subjects with low d-p & save survey data
removedp_thre = quantile(basic$d_p[!is.na(basic$d_p)],prob = d_p_thre)
removeSubdp = basic$subID[basic$d_p<removedp_thre | is.na(basic$d_p)]
sprintf("low d-prime %f. subjects: %i",removedp_thre,length(removeSubdp))
## [1] "low d-prime 0.722628. subjects: 9"
remainSubdp = basic$subID[basic$d_p>d_p_thre]
sprintf("pass d-prim threshould: %i", length(remainSubdp))
## [1] "pass d-prim threshould: 72"
#sprintf("remove d prime lower than %f. Sequence 01 remain subjects: %i; sequence 02 remain subjects: %i",d_p_thre, #length(remain_dp_s1),length(remain_dp_s2))
for (irow in 1:dim(postsurvey)[1]){
for (irow_basic in 1:dim(basic)[1]){
if (postsurvey$SubID[irow]==basic$subID[irow_basic]){
postsurvey$basic_id[irow] = as.character(basic$subID[irow_basic])
postsurvey$d_p[irow] = basic$d_p[irow_basic]
postsurvey$feature[irow] = unique(totalData$feature_index[totalData$subID==postsurvey$SubID[irow]])
}
}
}
write.csv(postsurvey, file = paste0(postsurvey_dp_fname,".csv"))
##remove low dp 3rd run
num_lowdp = totalData %>% filter(subID %in% as.character(removeSubdp)) %>% group_by(feature_index) %>% summarize(num_trial = length(unique(subID)))
num_lowdp
## # A tibble: 3 x 2
## feature_index num_trial
## <fct> <int>
## 1 1 2
## 2 2 4
## 3 3 3
totalData = totalData%>% filter(!subID %in% removeSubdp)
by features – within Features
summarize = dplyr::summarize
##########
basicFeature = totalData %>%group_by(subID,feature_index)%>% summarize(
hit = sum(keys==1&SameDiff=="S"),
fa= sum(keys==1&SameDiff=="D"),
cr = sum(keys==0&SameDiff=="D"),
miss = sum(keys==0&SameDiff=="S"),
HitRate =sum(keys==1&SameDiff=="S")/sum(SameDiff=="S"),
FARate = sum(keys==1&SameDiff=="D")/sum(SameDiff=="D"),
CRRate = sum(keys==0&SameDiff=="D")/sum(SameDiff=="D"),
MissRate = sum(keys==0&SameDiff=="S")/sum(SameDiff=="S"),
adjFARate = 1/(2*sum(SameDiff=="D")),
adjHitRate = 1-(1/(2*sum(SameDiff=="S"))))
basicFeature$FARate[basicFeature$FARate==0]=basicFeature$adjFARate[basicFeature$FARate==0]
basicFeature$HitRate[basicFeature$HitRate[!is.na(basicFeature$HitRate)]==1]=basicFeature$adjHitRate[basicFeature$HitRate[!is.na(basicFeature$HitRate)]==1]
row_index = basicFeature$CRRate==1 & basicFeature$MissRate==1
basicFeature$FARate[row_index]=NA
basicFeature$HitRate[row_index]=NA
basicFeature$adjFARate[row_index]=NA
basicFeature$adjHitRate[row_index]=NA
row_index = basicFeature$CRRate==0 & basicFeature$MissRate==0
basicFeature$FARate[row_index]=NA
basicFeature$HitRate[row_index]=NA
basicFeature$adjFARate[row_index]=NA
basicFeature$adjHitRate[row_index]=NA
basicFeature = mutate(basicFeature, d_p = qnorm(HitRate)-qnorm(FARate))
###
basicFeature_longform = gather(basicFeature, response_type, number_resp, HitRate,FARate,CRRate,MissRate)
head(basicFeature_longform)
## # A tibble: 6 x 11
## # Groups: subID [6]
## subID feature_index hit fa cr miss adjFARate adjHitRate d_p
## <fct> <fct> <int> <int> <int> <int> <dbl> <dbl> <dbl>
## 1 "" 2 100 15 86 19 0.00476 0.996 1.89
## 2 1043… 3 117 13 92 9 0.00476 0.996 2.62
## 3 1050… 2 91 31 62 23 0.00476 0.996 1.13
## 4 1070… 1 123 21 84 3 0.00476 0.996 2.82
## 5 10f4… 3 124 5 100 2 0.00476 0.996 3.82
## 6 1106… 2 121 12 90 4 0.00476 0.996 2.96
## # ... with 2 more variables: response_type <chr>, number_resp <dbl>
ggplot(basicFeature_longform, aes(x = response_type, y = number_resp, color = response_type))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
facet_wrap(~feature_index)+
labs(title="Proportaion response type by feature", x = "response type", y = "proportion")+
theme_facet()

ggplot(basicFeature, aes(x = feature_index, y = d_p, group = feature_index, color = feature_index))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
plotformat

#ggplot(basicFeature, aes(x = subID, y = d_p, group = feature_index, color = feature_index))+
# geom_point(size=3)+
# plotformat
stats
basicFeature$feature_index[is.infinite(basicFeature$d_p)]
## factor(0)
## Levels: 1 2 3
basicFeature$subID[is.infinite(basicFeature$d_p)]
## factor(0)
## 80 Levels: 10438bd 1050095 1070d1e 10f4080 1106b05 1119c1a ... c223b3d
feature_anova = aov(d_p~feature_index,basicFeature)
summary(feature_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## feature_index 2 3.11 1.5530 1.94 0.152
## Residuals 63 50.44 0.8006
by features by runs – within Features
summarize = dplyr::summarize
##########
basicFeature = totalData %>%group_by(subID,feature_index,run)%>% summarize(
hit = sum(keys==1&SameDiff=="S"),
fa= sum(keys==1&SameDiff=="D"),
cr = sum(keys==0&SameDiff=="D"),
miss = sum(keys==0&SameDiff=="S"),
HitRate =sum(keys==1&SameDiff=="S")/sum(SameDiff=="S"),
FARate = sum(keys==1&SameDiff=="D")/sum(SameDiff=="D"),
CRRate = sum(keys==0&SameDiff=="D")/sum(SameDiff=="D"),
MissRate = sum(keys==0&SameDiff=="S")/sum(SameDiff=="S"),
adjFARate = 1/(2*sum(SameDiff=="D")),
adjHitRate = 1-(1/(2*sum(SameDiff=="S"))))
basicFeature$FARate[basicFeature$FARate==0]=basicFeature$adjFARate[basicFeature$FARate==0]
basicFeature$HitRate[basicFeature$HitRate[!is.na(basicFeature$HitRate)]==1]=basicFeature$adjHitRate[basicFeature$HitRate[!is.na(basicFeature$HitRate)]==1]
row_index = basicFeature$CRRate==1 & basicFeature$MissRate==1
basicFeature$FARate[row_index]=NA
basicFeature$HitRate[row_index]=NA
basicFeature$adjFARate[row_index]=NA
basicFeature$adjHitRate[row_index]=NA
row_index = basicFeature$CRRate==0 & basicFeature$MissRate==0
basicFeature$FARate[row_index]=NA
basicFeature$HitRate[row_index]=NA
basicFeature$adjFARate[row_index]=NA
basicFeature$adjHitRate[row_index]=NA
basicFeature = mutate(basicFeature, d_p = qnorm(HitRate)-qnorm(FARate))
###
ggplot(basicFeature, aes(x = feature_index, y = d_p, group = feature_index, color = feature_index))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
facet_wrap(~run)+
plotformat

#ggplot(basicFeature, aes(x = subID, y = d_p, group = feature_index, color = feature_index))+
# geom_point(size=3)+
# plotformat
stats
basicFeature_run3 = basicFeature %>% filter(run==3, !is.infinite(d_p))
anovaFeature3rdRun = aov(d_p~feature_index,basicFeature_run3)
summary(anovaFeature3rdRun)
## Df Sum Sq Mean Sq F value Pr(>F)
## feature_index 2 2.28 1.139 1.044 0.358
## Residuals 63 68.79 1.092
summarize = dplyr::summarize
compareFeature3rdRun = basicFeature %>%filter(run==3,!is.infinite(d_p)) %>% group_by(feature_index) %>%summarize(meanD_p = mean(d_p),std =sd(d_p),med = median(d_p) )
compareFeature3rdRun
## # A tibble: 3 x 4
## feature_index meanD_p std med
## <fct> <dbl> <dbl> <dbl>
## 1 1 3.31 0.838 3.35
## 2 2 2.88 1.25 2.87
## 3 3 3.24 1.01 3.53
#top 10 subjects
topdp = quantile(basicFeature$d_p,90/100)
tempBasicFeature = basicFeature %>% filter(run==3)
top10 = tempBasicFeature %>%filter(d_p>=topdp)
top10
## # A tibble: 16 x 14
## # Groups: subID, feature_index [16]
## subID feature_index run hit fa cr miss HitRate FARate CRRate
## <fct> <fct> <int> <int> <int> <int> <int> <dbl> <dbl> <dbl>
## 1 1043… 3 3 42 0 35 0 0.988 0.0143 1
## 2 1545… 1 3 42 0 35 0 0.988 0.0143 1
## 3 16c2… 3 3 42 1 34 0 0.988 0.0286 0.971
## 4 174f… 3 3 41 0 34 1 0.976 0.0143 0.971
## 5 175a… 3 3 42 0 34 0 0.988 0.0143 0.971
## 6 1791… 1 3 42 0 35 0 0.988 0.0143 1
## 7 1919… 1 3 42 0 35 0 0.988 0.0143 1
## 8 1934… 2 3 42 0 35 0 0.988 0.0143 1
## 9 1b47… 3 3 42 1 34 0 0.988 0.0286 0.971
## 10 1bee… 2 3 42 0 35 0 0.988 0.0143 1
## 11 1d48… 2 3 42 0 35 0 0.988 0.0143 1
## 12 1da7… 3 3 42 1 34 0 0.988 0.0286 0.971
## 13 1df9… 2 3 42 0 35 0 0.988 0.0143 1
## 14 1e20… 1 3 42 1 34 0 0.988 0.0286 0.971
## 15 1f94… 1 3 42 0 35 0 0.988 0.0143 1
## 16 c223… 2 3 42 0 35 0 0.988 0.0143 1
## # ... with 4 more variables: MissRate <dbl>, adjFARate <dbl>,
## # adjHitRate <dbl>, d_p <dbl>
within a feature: levels
summarize = dplyr::summarize
basicLevel = totalData %>% filter(keys>=0)%>% group_by(subID,level_diff)%>%
summarize(
hit = sum(keys==1&SameDiff=="S"),
fa= sum(keys==1&SameDiff=="D"),
cr = sum(keys==0&SameDiff=="D"),
miss = sum(keys==0&SameDiff=="S"),
HitRate =sum(keys==1&SameDiff=="S")/sum(SameDiff=="S"),
FARate = sum(keys==1&SameDiff=="D")/sum(SameDiff=="D"),
CRRate = sum(keys==0&SameDiff=="D")/sum(SameDiff=="D"),
MissRate = sum(keys==0&SameDiff=="S")/sum(SameDiff=="S"))
####
basicLevel_longform = gather(basicLevel, response_type, resp_rate, HitRate,FARate,CRRate,MissRate)
head(basicLevel_longform)
## # A tibble: 6 x 8
## # Groups: subID [1]
## subID level_diff hit fa cr miss response_type resp_rate
## <fct> <fct> <int> <int> <int> <int> <chr> <dbl>
## 1 "" 0 100 0 0 19 HitRate 0.840
## 2 "" 1 0 7 13 0 HitRate NaN
## 3 "" 2 0 2 19 0 HitRate NaN
## 4 "" 3 0 1 19 0 HitRate NaN
## 5 "" 4 0 4 16 0 HitRate NaN
## 6 "" 5 0 1 19 0 HitRate NaN
ggplot(basicLevel_longform, aes(x = response_type, y = resp_rate, color = response_type))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
facet_wrap(~level_diff,nrow = 1)+
labs(title="Proportaion response type by difficulty level", x = "response type", y = "proportion")+
theme_facet()
## Warning: Removed 792 rows containing non-finite values (stat_boxplot).
## Warning: Removed 792 rows containing missing values (geom_point).

###########
basicLevelFeature = totalData %>% filter(keys>=0)%>%group_by(subID,level_diff,feature_index)%>%
summarize( accuracy = sum(accuracy)/length(subID),
hit = sum(keys==1&SameDiff=="S"),
fa= sum(keys==1&SameDiff=="D"),
cr = sum(keys==0&SameDiff=="D"),
miss = sum(keys==0&SameDiff=="S"),
HitRate =sum(keys==1&SameDiff=="S")/sum(SameDiff=="S"),
FARate = sum(keys==1&SameDiff=="D")/sum(SameDiff=="D"),
CRRate = sum(keys==0&SameDiff=="D")/sum(SameDiff=="D"),
MissRate = sum(keys==0&SameDiff=="S")/sum(SameDiff=="S"))
basicLevelFeature_longform = gather(basicLevelFeature, response_type, resp_rate, HitRate,FARate,CRRate,MissRate)
head(basicLevelFeature_longform)
## # A tibble: 6 x 10
## # Groups: subID, level_diff [6]
## subID level_diff feature_index accuracy hit fa cr miss
## <fct> <fct> <fct> <dbl> <int> <int> <int> <int>
## 1 "" 0 2 0.840 100 0 0 19
## 2 "" 1 2 0.65 0 7 13 0
## 3 "" 2 2 0.905 0 2 19 0
## 4 "" 3 2 0.95 0 1 19 0
## 5 "" 4 2 0.8 0 4 16 0
## 6 "" 5 2 0.95 0 1 19 0
## # ... with 2 more variables: response_type <chr>, resp_rate <dbl>
ggplot(basicLevelFeature_longform, aes(x = response_type, y = resp_rate, color = response_type))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
facet_wrap(feature_index~level_diff,nrow = 3,strip.position = "bottom")+
labs(title="Proportaion resp type within Feature by difficulty level", x = "level diff", y = "proportion")+
theme_facet()
## Warning: Removed 792 rows containing non-finite values (stat_boxplot).
## Warning: Removed 792 rows containing missing values (geom_point).

ggplot(basicLevelFeature, aes(x=feature_index,y=accuracy,color = feature_index))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
facet_wrap(~level_diff)+
plotformat

ggplot(basicLevelFeature, aes(x=level_diff,y=accuracy,color = feature_index))+
geom_boxplot(fill = "white",lwd = 1)+
plotformat

ggplot(filter(basicLevelFeature,feature_index==1), aes(x=level_diff,y=accuracy))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
labs(title="Feauture 01", x = "level diff", y = "accuracy")+
ylim(0,1)+
plotformat
## Warning: Removed 21 rows containing missing values (geom_point).

ggplot(filter(basicLevelFeature,feature_index==2), aes(x=level_diff,y=accuracy))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
labs(title="Feauture 02", x = "level diff", y = "accuracy")+
ylim(0,1)+
plotformat
## Warning: Removed 11 rows containing missing values (geom_point).

ggplot(filter(basicLevelFeature,feature_index==3), aes(x=level_diff,y=accuracy))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
labs(title="Feauture 03", x = "level diff", y = "accuracy")+
ylim(0,1)+
plotformat
## Warning: Removed 9 rows containing missing values (geom_point).

stats
anovaFeatureLevel = aov(accuracy~feature_index+level_diff,basicLevelFeature)
summary(anovaFeatureLevel)
## Df Sum Sq Mean Sq F value Pr(>F)
## feature_index 2 0.306 0.1532 7.87 0.000446 ***
## level_diff 5 5.296 1.0591 54.40 < 2e-16 ***
## Residuals 388 7.553 0.0195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summarize = dplyr::summarize
compareFeatures = basicLevelFeature %>%group_by(feature_index,level_diff) %>%summarize(meanAccuracy = mean(accuracy),std =sd(accuracy) )
compareFeatures
## # A tibble: 18 x 4
## # Groups: feature_index [?]
## feature_index level_diff meanAccuracy std
## <fct> <fct> <dbl> <dbl>
## 1 1 0 0.945 0.0502
## 2 1 1 0.651 0.254
## 3 1 2 0.868 0.0895
## 4 1 3 0.935 0.0678
## 5 1 4 0.954 0.0560
## 6 1 5 0.960 0.0504
## 7 2 0 0.918 0.0680
## 8 2 1 0.590 0.274
## 9 2 2 0.878 0.117
## 10 2 3 0.903 0.109
## 11 2 4 0.925 0.0851
## 12 2 5 0.930 0.0924
## 13 3 0 0.913 0.0878
## 14 3 1 0.561 0.239
## 15 3 2 0.825 0.169
## 16 3 3 0.859 0.130
## 17 3 4 0.873 0.116
## 18 3 5 0.879 0.151
separate by run
basicLevelFeature = totalData %>% group_by(subID,level_diff,feature_index,run)%>%
summarize( accuracy = sum(accuracy)/length(subID),
hit = sum(keys==1&SameDiff=="S"),
fa= sum(keys==1&SameDiff=="D"),
cr = sum(keys==0&SameDiff=="D"),
miss = sum(keys==0&SameDiff=="S"),
HitRate =sum(keys==1&SameDiff=="S")/sum(SameDiff=="S"),
FARate = sum(keys==1&SameDiff=="D")/sum(SameDiff=="D"),
CRRate = sum(keys==0&SameDiff=="D")/sum(SameDiff=="D"),
MissRate = sum(keys==0&SameDiff=="S")/sum(SameDiff=="S")) #filter(keys>=0)%>%
basicLevelFeature_longform = gather(basicLevelFeature, response_type, resp_rate, HitRate,FARate,CRRate,MissRate)
head(basicLevelFeature_longform)
## # A tibble: 6 x 11
## # Groups: subID, level_diff, feature_index [2]
## subID level_diff feature_index run accuracy hit fa cr miss
## <fct> <fct> <fct> <int> <dbl> <int> <int> <int> <int>
## 1 "" 0 2 1 0.929 39 0 0 3
## 2 "" 0 2 2 0.738 33 0 0 7
## 3 "" 0 2 3 0.548 28 0 0 9
## 4 "" 1 2 1 0.857 0 1 6 0
## 5 "" 1 2 2 0.857 0 1 6 0
## 6 "" 1 2 3 0 0 5 1 0
## # ... with 2 more variables: response_type <chr>, resp_rate <dbl>
ggplot(filter(basicLevelFeature,feature_index==1), aes(x=level_diff,y=accuracy,group = level_diff,color=subID))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
labs(title="Feauture 01", x = "level diff", y = "accuracy")+
ylim(0,1)+
facet_wrap(~run)+
plotformat
## Warning: Removed 97 rows containing missing values (geom_point).

ggplot(filter(basicLevelFeature,feature_index==2), aes(x=level_diff,y=accuracy,group = level_diff,color=subID))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
labs(title="Feauture 02", x = "level diff", y = "accuracy")+
ylim(0,1)+
facet_wrap(~run)+
plotformat
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 75 rows containing missing values (geom_point).

ggplot(filter(basicLevelFeature,feature_index==3), aes(x=level_diff,y=accuracy,group = level_diff,color=subID))+
geom_boxplot(fill = "white",lwd = 1)+
geom_jitter(width=0.2,alpha = 0.5)+
labs(title="Feauture 03", x = "level diff", y = "accuracy")+
ylim(0,1)+
facet_wrap(~run)+
plotformat
## Warning: Removed 102 rows containing missing values (geom_point).

stats
anovaFeatureLevel = aov(accuracy~feature_index+level_diff+run,basicLevelFeature)
summary(anovaFeatureLevel)
## Df Sum Sq Mean Sq F value Pr(>F)
## feature_index 2 0.58 0.291 6.927 0.00102 **
## level_diff 5 14.25 2.851 67.793 < 2e-16 ***
## run 1 3.54 3.540 84.190 < 2e-16 ***
## Residuals 1179 49.58 0.042
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anovaFeatureLevel3rdRun = aov(accuracy~feature_index+level_diff,filter(basicLevelFeature,run==3))
summary(anovaFeatureLevel3rdRun)
## Df Sum Sq Mean Sq F value Pr(>F)
## feature_index 2 0.219 0.1096 3.189 0.0423 *
## level_diff 5 4.325 0.8650 25.160 <2e-16 ***
## Residuals 388 13.340 0.0344
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anovaFeatureLevel3rdRun_same = aov(accuracy~feature_index,filter(basicLevelFeature,run==3 & level_diff==0))
summary(anovaFeatureLevel3rdRun_same)
## Df Sum Sq Mean Sq F value Pr(>F)
## feature_index 2 0.0502 0.02509 1.795 0.175
## Residuals 63 0.8806 0.01398
SameaccuracyF1 = filter(basicLevelFeature,feature_index==1,run==3 & level_diff==0)
SameaccuracyF2 = filter(basicLevelFeature,feature_index==2,run==3 & level_diff==0)
SameaccuracyF3 = filter(basicLevelFeature,feature_index==3,run==3 & level_diff==0)
t.test(SameaccuracyF1$accuracy,SameaccuracyF2$accuracy)
##
## Welch Two Sample t-test
##
## data: SameaccuracyF1$accuracy and SameaccuracyF2$accuracy
## t = 1.6226, df = 30.873, p-value = 0.1148
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01545197 0.13563338
## sample estimates:
## mean of x mean of y
## 0.9342404 0.8741497
t.test(SameaccuracyF1$accuracy,SameaccuracyF3$accuracy)
##
## Welch Two Sample t-test
##
## data: SameaccuracyF1$accuracy and SameaccuracyF3$accuracy
## t = 0.057834, df = 41.232, p-value = 0.9542
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05767587 0.06107723
## sample estimates:
## mean of x mean of y
## 0.9342404 0.9325397
t.test(SameaccuracyF2$accuracy,SameaccuracyF3$accuracy)
##
## Welch Two Sample t-test
##
## data: SameaccuracyF2$accuracy and SameaccuracyF3$accuracy
## t = -1.455, df = 37.44, p-value = 0.154
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.13966894 0.02288889
## sample estimates:
## mean of x mean of y
## 0.8741497 0.9325397
anovaFeatureLevel3rdRun_l01 = aov(accuracy~feature_index,filter(basicLevelFeature,run==3 & level_diff==1))
summary(anovaFeatureLevel3rdRun_l01)
## Df Sum Sq Mean Sq F value Pr(>F)
## feature_index 2 0.181 0.09063 0.86 0.428
## Residuals 63 6.642 0.10544
summarize = dplyr::summarize
compareFeatures = basicLevelFeature %>% filter(run==3) %>% group_by(feature_index,level_diff) %>%summarize(meanAccuracy = mean(accuracy),std =sd(accuracy),med = median(accuracy) )
compareFeatures
## # A tibble: 18 x 5
## # Groups: feature_index [?]
## feature_index level_diff meanAccuracy std med
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 1 0 0.934 0.0811 0.952
## 2 1 1 0.721 0.237 0.714
## 3 1 2 0.891 0.180 1
## 4 1 3 0.966 0.0770 1
## 5 1 4 0.973 0.0731 1
## 6 1 5 0.925 0.154 1
## 7 2 0 0.874 0.149 0.929
## 8 2 1 0.592 0.381 0.714
## 9 2 2 0.884 0.179 1
## 10 2 3 0.905 0.138 1
## 11 2 4 0.884 0.205 1
## 12 2 5 0.932 0.133 1
## 13 3 0 0.933 0.115 0.976
## 14 3 1 0.637 0.337 0.643
## 15 3 2 0.881 0.244 1
## 16 3 3 0.958 0.107 1
## 17 3 4 0.964 0.0868 1
## 18 3 5 0.952 0.109 1